/*** tabdata3.do

This do-file matches the census codes in each year with unit_code_08 codes used for
the rest of the tabular data.

This do-file includes the following parts:
1. Ready 1990 Aggregate Census Data from Harvard & Merge to Loren's Data 
2. Merge 1990 Census Data Sets to the Correspondence Table
3. Merge Census and Tabular Data Together in 2000 and 2005
4. Ready 2010 Census Data 
5. Ready 1982 Census Data
6. Merge Census Data Sets to the Correspondence Table
7. Create Predicted GDP Variables
8. Label Census Variables
9. Scale up census numbers to represent populations

***/


clear
set more off
set mem 100m
capture log close
log using tabdata3.log, replace text


******* 1. Ready 1990 Aggregate Census Data from Harvard & Merge to Loren's Data ******************

use ..\..\data\census\source\chinaad190.dta, clear
sort gbcenmq
merge gbcenmq using ..\..\data\census\source\chinaae190.dta
tab _merge
drop _merge
gen countyCode = string(gbcenmq)
*** These are missing
replace a6 = 5 if substr(countyCode,-2,.)=="51"
replace a6 = 6 if substr(countyCode,-2,.)=="52"|substr(countyCode,-2,.)=="53"
*** This is a miscode based on the corr table
replace a6 = 8 if gbcenmq==430312
replace a6 = 8 if gbcenmq==410412
*** These are strange locations
drop if a6<0
gen city_codec = 100*int(gbcenmq/100)
**Consolidate few cases of multiple codes for the same prefecture
replace city_codec = 110000 if city_codec==110100
replace city_codec = 110000 if city_codec==110200
replace city_codec = 120000 if city_codec==120100
replace city_codec = 120000 if city_codec==120200
replace city_codec = 310000 if city_codec==310100
replace city_codec = 310000 if city_codec==310200
replace city_codec = 500000 if city_codec==510200
save temp_census.dta, replace

*** Break off CP and aggregate, works since units are mutually exclusive
keep if a6<=7
#delimit cr
sort city_codec gbcenmq
by city_codec: replace nmcenmq = nmcenmq[1]
collapse (sum) a*, by(city_codec nmcenmq)
rename city_codec city_code
gen year=1990
sort city_code year
save ..\..\data\tabular_data_BJ\generated\cp90.dta, replace

*** Break off 2 obs for use later
keep if city_code==430800|city_code==441400
gen cemp_sect1 = a384+a390
gen cemp_sect2 = a387
gen cemp_sect3 = a381-cemp_sect2-cemp_sect1
rename city_code city05
keep city05 year cemp_sect1 cemp_sect2 cemp_sect3
sort city05 year
save tempcen90.dta, replace

*** Build PF Data
use temp_census.dta
sort city_codec gbcenmq
by city_codec: replace nmcenmq = nmcenmq[1]
collapse (sum) a*, by(city_codec nmcenmq)
rename city_codec city_code
gen year=1990
sort city_code year
save ..\..\data\census\generated\pf90.dta, replace

*** Break off county level units (add back some prematurely promoted county cities & qu)
use temp_census.dta,clear
#delimit ;
keep if a6>6 | 
gbcenmq==371101|gbcenmq==420401|gbcenmq==460151|gbcenmq==460251|gbcenmq==140602|gbcenmq==220502
|gbcenmq==330402|gbcenmq==330902|gbcenmq==341002|gbcenmq==370502|gbcenmq==420802|gbcenmq==420851
|gbcenmq==430703|gbcenmq==441802|gbcenmq==441901|gbcenmq==442001|gbcenmq==510802|gbcenmq==511102
|gbcenmq==520201|gbcenmq==620402|gbcenmq==620502|gbcenmq==530201;
#delimit cr
*Deal with one a6=9 unit which is a county subset
replace gbcenmq = 342421 if gbcenmq == 342401
sort gbcenmq
by gbcenmq: replace nmcenmq = nmcenmq[1]
by gbcenmq: replace nmhanzi = nmhanzi[1]
drop a5 a6
collapse (sum) a* (mean) city_codec, by(gbcenmq nmcenmq nmhanzi)
gen countyCode = string(gbcenmq)

** Merge County units to Loren's Data
**Recode a few units that do not match but align based on population
replace countyCode = "220180" if countyCode=="220181"
replace countyCode = "220383" if countyCode=="220381"
replace countyCode = "220584" if countyCode=="220581"
replace countyCode = "220585" if countyCode=="220582"
replace countyCode = "230180" if countyCode=="230181"
replace countyCode = "230880" if countyCode=="230881"
replace countyCode = "320280" if countyCode=="320282"
replace countyCode = "320380" if countyCode=="320381"
replace countyCode = "320580" if countyCode=="320583"
replace countyCode = "320680" if countyCode=="320681"
replace countyCode = "320880" if countyCode=="320882"
replace countyCode = "320980" if countyCode=="320981"
replace countyCode = "321080" if countyCode=="321083"
replace countyCode = "321180" if countyCode=="321181"
replace countyCode = "350480" if countyCode=="350481"
replace countyCode = "350580" if countyCode=="350581"
replace countyCode = "360480" if countyCode=="360481"
replace countyCode = "371198" if countyCode=="371101"
replace countyCode = "410480" if countyCode=="410482"
replace countyCode = "410780" if countyCode=="410782"
replace countyCode = "410880" if countyCode=="410882"
replace countyCode = "411080" if countyCode=="411081"
replace countyCode = "420803" if countyCode=="420851"
replace countyCode = "441998" if countyCode=="441901"
replace countyCode = "442098" if countyCode=="442001"
replace countyCode = "510180" if countyCode=="510181"
replace countyCode = "510680" if countyCode=="510681"
replace countyCode = "510780" if countyCode=="510781"
replace countyCode = "511180" if countyCode=="511181"
sort countyCode
merge countyCode using ..\..\data\census\generated\count1990.dta
tab _merge
rename _merge mrgcen
sort countyCode

**  add transportation variable in 1990 (HY Aug 20, 2011)
merge countyCode using ..\..\data\census\generated\transport1990.dta
tab _merge
drop _merge

gen census_code = real(countyCode)
gen cencode = census_code
gen province_code = 10000*int(census_code/10000)
sort census_code
save temp_census.dta, replace


*********** 2. Merge 1990 Census Data Sets to the Correspondence Table *********************

use ..\..\data\correspondence_tables\generated\correspondence_82_10.dta
keep if year==1990

*** Merge on RZ's codes
sort unit_code_08
merge unit_code_08 using ..\..\data\correspondence_tables\source\census_match90.dta
tab _merge

** These are from the 2 prefectures we dropped
drop if _merge==2
*** These are added obs in which census_code is the same 
replace census_code = unit_code_08 if _merge==1
drop _merge

*** Merge on Census data
sort census_code
merge census_code using temp_census.dta, update
***Drop counties in provinces outside of study area
sort province_code _merge
by province_code: gen fmrg = _merge[1]
by province_code: gen lmrg = _merge[_N]
drop if fmrg==2 & lmrg==2
drop fmrg lmrg
tab _merge
tab _merge mrgcen
*browse unit_code_08 census_code unit_name nmcen _merge mrgcen if _merge<3 | mrgcen<3

/** The rest have been handchecked and need not be matched 
they are individual urban districts or outside of the study area **/
drop if _merge==2
drop _merge mrgcen

sort unit_code_08 year
save temp_census.dta, replace


********** 3. Merge Census and Tabular Data Together in 2000 and 2005 **************

**Ready sample census data from other years
use ..\..\data\census\generated\count2000.dta, clear

**  add transportation variable in 2000 (HY Aug 20, 2011)
sort countyCode
merge countyCode using ..\..\data\census\generated\transport2000.dta
tab _merge
drop _merge

gen year = 2000
gen census_code = real(countyCode)
drop countyCode

save cnt2000_temp.dta, replace

**  add transportation variable in 2005 (HY Aug 20, 2011)
use ..\..\data\census\generated\count2005.dta, clear
merge countyCode using ..\..\data\census\generated\transport2005.dta
tab _merge
drop _merge
save cnt2005_temp.dta, replace

**** Ready Year 2000-2005 Data
use cnt2000_temp.dta, clear

append using cnt2005_temp.dta
replace year = 2005 if year==.
replace census_code = countyCode if year==2005
gen cencode = census_code
sort census_code year
save cen0005.dta, replace

*Use US123, keep pop and codes in 1990, 2000 and 2005
use ..\..\data\tabular_data_BJ\generated\us123.dta

sort unit_code_08 year
merge unit_code_08 year using temp_census.dta
tab year _merge
drop _merge

/** Get the prefecture codes correct for 1990: Note city_codec is missing 
for most urban districts, but city code is always correct in these cases**/
replace city_code = city_codec if year==1990 & city_codec~=.
*** Leave subsequent years as are, though there may be some errors in 1990

/*** Make a guess at what census code might be for later years and merge on that, then
try unit_code_08 instead.  Checks reveal no discrepancies between these two merges
so we can safely treat them as mutually exclusive. **/
sort unit_code_08 year
by unit_code_08: replace census_code = census_code[1]
replace census_code = 230109 if unit_code_08==230109 & year==2005
replace census_code = unit_code_08 if census_code==.
sort census_code year
merge census_code year using cen0005.dta, update
tab _merge year if year==2000|year==2005
move empTransport ruralMig
save temp.dta, replace

*** Break off obs that did not merge on
keep if _merge==2
drop _merge
gen mrg1 = 1
*** Change remaining census codes based on visual inspection
replace census_code = 130207 if census_code==130282 & year==2000
replace census_code = 141102 if census_code==142302 & year==2000
replace census_code = 141181 if census_code==142301 & year==2000
replace census_code = 141182 if census_code==142303 & year==2000
replace census_code = 220605 if census_code==220625
replace census_code = 230109 if census_code==230107 & year==2005
replace census_code = 230112 if census_code==230181
replace census_code = 320205 if census_code==320283 & year==2000
replace census_code = 320206 if census_code==320212 & year==2000
replace census_code = 320412 if census_code==320483 & year==2000
replace census_code = 320506 if census_code==320586 & year==2000
replace census_code = 320803 if census_code==320882 & year==2000
replace census_code = 320903 if census_code==320928 & year==2000
replace census_code = 321311 if census_code==321321 & year==2000
replace census_code = 330110 if census_code==330184 & year==2000
replace census_code = 330502 if census_code==330501 & year==2005
replace census_code = 340208 if census_code==340204 & year==2005
replace census_code = 341600 if census_code==341602 & year==2000
replace census_code = 370911 if census_code==370903
replace census_code = 410311 if census_code==410307
replace census_code = 420506 if census_code==420521 & year==2000
replace census_code = 440513 if census_code==440582 & year==2000
replace census_code = 440515 if census_code==440583 & year==2000
replace census_code = 440560 if census_code==440510 & year==2000
replace census_code = 440561 if census_code==440509 & year==2000
replace census_code = 440511 if census_code==440508 & year==2000
replace census_code = 440605 if census_code==440681 & year==2000
replace census_code = 440606 if census_code==440682 & year==2000
replace census_code = 440607 if census_code==440683 & year==2000
replace census_code = 440608 if census_code==440684 & year==2000
replace census_code = 440705 if census_code==440782 & year==2000
replace census_code = 441303 if census_code==441381 & year==2000
replace census_code = 441900 if census_code==441901
replace census_code = 442000 if census_code==442001
replace census_code = 451102 if census_code==452402 & year==2000
replace census_code = 451281 if census_code==452702 & year==2000
replace census_code = 500115 if census_code==500221 & year==2000
replace census_code = 500116 if census_code==500381
replace census_code = 500117 if census_code==500382
replace census_code = 500118 if census_code==500383
replace census_code = 500119 if census_code==500384
replace census_code = 510903 if census_code==510902
replace census_code = 530502 if census_code==533001 & year==2000
replace census_code = 530801 if census_code==532701 & year==2000
replace census_code = 640325 if census_code==640303 & year>=2000
replace census_code = 640502 if census_code==640321 & year==2000
replace census_code = 640521 if census_code==640322  & year==2000
sort mrg1 census_code year
save cen0005.dta, replace


**************** 4. Ready 2010 Census Data **********************

clear
foreach longshort in A L {

	clear
	import excel "..\..\data\census\source\2010CountyCensus`longshort'.xlsx", sheet("Sheet1") firstrow

	* drop vars in Chinese 
	drop County_CH City_CH Prov_CH

	*** drop provinces which are out of study area
	*** - Neimenggu Hainan Xizang Qinghai Xinjiang
	replace GbProv = "62" if GbCounty=="620201"
	foreach x of num 15 46 47 54 63 65 {
	drop if GbProv == "`x'"
	}

	*** drop out of study area

	/*
	GbCity	City_EN	GbProv	Prov_EN

	5323	Chuxiongyizu	53	Yunnan
	5325	Honghehanizuyizu	53	Yunnan
	5326	Wenshanzhuangzumiaozu	53	Yunnan
	5328	Xishuangbannadaizu	53	Yunnan
	5329	Dalibaizu	53	Yunnan
	5331	Dehongdaizujingpozu	53	Yunnan
	5333	Nujianglisuzu	53	Yunnan
	5334	Diqingcangzu	53	Yunnan

	6401	Yinchuan	64	Ningxia
	6402	Shizuishan	64	Ningxia

	*/

	foreach x of num 5323 5325 5326 5328 5329 5331 5333 5334 6401 6402 {
	drop if GbCity == "`x'"
	}
	order GbCity City_EN
	order GbProv Prov_EN

	sort GbCounty
	save "2010CountyCensus`longshort'", replace
}


*---------------------------------------------------------------
* Part 1.2: merge following two data sets together
* - 2010CountyCensusA.dta
* - 2010CountyCensusL.dta
*---------------------------------------------------------------

merge 1:1 GbCounty using "2010CountyCensusA" // all counties are "_merge==3"

drop _merge

*---------------------------------------------------------------
* Part 1.3: change variable names and keep only variables of interest
* - resulting data set is 
* census_county2010m.dta
*---------------------------------------------------------------

// "original name of the variable" "our variable name" "label of the variable"

#delimit ;
foreach x in 
"A100001	a102	@Total Population"
"A100002	a103	@Total Males"
"A100003	a104	@Total Females"
"A100008	a117	@Urban Population"
"A100009	a120	@Rural Population"

"A200001	a185a	@Total Males at 0 Age"
"A200002	a186a	@Total Females at 0 Age"
"A200003	a185b	@Total Males at 1-4 Age"
"A200004	a186b	@Total Females at 1-4 Age"
"A200005	a188	@Total Males at 5-9 Age"
"A200006	a189	@Total Females at 5-9 Age"
"A200007	a191	@Total Males at 10-14 Age"
"A200008	a192	@Total Females at 10-14 Age"
"A200009	a194	@Total Males at 15-19 Age"
"A200010	a195	@Total Females at 15-19 Age"

"A200025	aN2025	@Total Males at 55-59 Age"
"A200026	aN2026	@Total Females at 55-59 Age"
"A200027	aN2027	@Total Males at 60-64 Age"
"A200028	aN2028	@Total Females at 60-64 Age"
"A200029	aN2029	@Total Males at 65-69 Age"
"A200030	aN2030	@Total Females at 65-69 Age"
"A200031	aN2031	@Total Males at 70-74 Age"
"A200032	aN2032	@Total Females at 70-74 Age"
"A200033	aN2033	@Total Males at 75-79 Age"
"A200034	aN2034	@Total Females at 75-79 Age"
"A200035	aN2035	@Total Males at 80-84 Age"
"A200036	aN2036	@Total Females at 80-84 Age"
"A200037	aN2037	@Total Males at 85 Age and over"
"A200038	aN2038	@Total Females at 85 Age and over"


"A300012	aN3012	@Total Population of Migration from the Same County "
"A300013	aN3013	@Total Population of Migration from Other Counties in the Same Province"
"A300014	aN3014	@Total Population of Migration from Other Provinces"

"A400003	a280	@Total Male with Primary School Education"
"A400004	a281	@Total Female with Primary School Education"
"A400005	a277	@Total Male with Junior Middle School Education"
"A400006	a278	@Total Female with Junior Middle School Education"
"A400007	a271_274	@Total Male with Senior High School Education"
"A400008	a272_275	@Total Female with Senior High School Education"
"A400009	a268	@Total Male with Junior College Education"
"A400010	a269	@Total Female with Junior College Education"
"A400011	a265	@Total Male with University and above Education"
"A400012	a266	@Total Female with University and above Education"
"A400013	aN4013	@Average Education Years"

"L500003	a434	@Sub-Total Employed Pop. of Responsible Persons of Government Offices,Central Committee of The Communist Party of China ,Different Local Organizations,Insfitution Unit and Enterprise"
"L500004	a431	@Sub-Total Employed Pop. of  Professional/Technical"
"L500005	a437	@Sub-Total Employed Pop. of  Clerk and Related Workers"
"L500006	aN5006	@Sub-Total Employed Pop. of  Commerce,Service Trade Personnel"
"L500007	a446	@Sub-Total Employed Pop. of�Crop Cultivation Production,�Forestry Production,�Animal Husbandry Production,�Fishery Production�and Fishery Production Personnel"
"L500008	a449	@Sub-Total Employed Pop. of  Production,Transport Equipment Operators and Related Personnel"
"L500009	a451	@Sub-Total Employed Pop. of  Not Stated"

"L600002	a381	@Total Employed Population by Industry"
"L600003	aN6003	@Sub-Total Employed Pop. in Agricultural Industry"
"L600004	aN6004	@Sub-Total Employed Pop. in Mining Industry"
"L600005	aN6005	@Sub-Total Employed Pop. in Manufacturing Industry"
"L600006	aN6006	@Sub-Total Employed Pop. in Production and Supply of Electric Power, Gas and Water Industry"
"L600007	aN6007	@Sub-Total Employed Pop. in Construction Industry"
"L600008	aN6008	@Sub-Total Employed Pop. in Storage and Postal Industry"
"L600009	aN6009	@Sub-Total Employed Pop. in Transportation, Computer Services and Software Industry "
"L600010	aN6010	@Sub-Total Employed Pop. in Information Transfer, Wholesale and Retail Trade Industry "
"L600011	aN6011	@Sub-Total Employed Pop. in Hotel and Restaurants Industry"
"L600012	aN6012	@Sub-Total Employed Pop. in Financial Industry"
"L600013	aN6013	@Sub-Total Employed Pop. in Real Estate Industry"
"L600014	aN6014	@Sub-Total Employed Pop. in Leasing and Business Services Industry"
"L600015	aN6015	@Sub-Total Employed Pop. in Scientific Research and Polytechnic Services and Geological Prospecting Industry"
"L600016	aN6016	@Sub-Total Employed Pop. in Water Conservancy, Environment and Public Facilities Management Industry"
"L600017	aN6017	@Sub-Total Employed Pop. in Resident and Other Services Industry"
"L600018	aN6018	@Sub-Total Employed Pop. in Education Industry"
"L600019	aN6019	@Sub-Total Employed Pop. in Health Care, Social Security and Social Welfare"
"L600020	aN6020	@Sub-Total Employed Pop. in Culture, Sports and Entertainment Industry"
"L600021	aN6021	@Sub-Total Employed Pop. in Public Administration and Social Organizations"
"L600022	aN6022	@Sub-Total Employed Pop. in International Organizations"

{;

local censusID = word("`x'", 1);
local ourID = word("`x'", 2);
local ourLabel = substr("`x'", strpos("`x'", "@")+1,.);

ren `censusID' `ourID' ;
label var `ourID' "`ourLabel'" ;

};
#delimit cr

*** a185 def. is male pop age 0-4
*** so add male age 0 + male age 1-4, same for female

gen a185 = a185a + a185b
label var a185 "Total Males at 0-4 Age"
gen a186 = a186a + a186b
label var a186 "Total Females at 0-4 Age"
drop a185a a185b a186a a186b

*** add another age category: Age 55 or older (aN_*_55older)

gen aN_m_55older = aN2025 + aN2027 + aN2029 + aN2031 + aN2033 + aN2035 + aN2037
gen aN_f_55older = aN2026 + aN2028 + aN2030 + aN2032 + aN2034 + aN2036 + aN2038
gen aN_t_55older = aN_m_55older + aN_f_55older

label var aN_t_55older "Total Population at Age 55 or older"
label var aN_m_55older "Total Males at Age 55 or older"
label var aN_f_55older "Total Females at Age 55 or older"

*** generate male + female variables for population by age categories

foreach x of num 184 187 190 193 {
local x1 = `x'+1
local x2 = `x'+2

gen a`x' = a`x1' + a`x2'
}

label var a184 "Total Population at 0-4 Age"
label var a187 "Total Population at 5-9 Age"
label var a190 "Total Population at 10-14 Age"
label var a193 "Total Population at 15-19 Age"

*** drop unneccessary variables
drop A*
drop L*
drop aN2025-aN2038

gen unit_code_08 = regexs(1) if regexm(GbCounty, "(^[0-9][0-9][0-9][0-9][0-9][0-9])")
destring unit_code_08, replace
gen year = 2010
sort unit_code_08 year
save cnt2010_temp.dta, replace


**************** 5. Ready 1982 Census Data **********************

** Ready additional census data sets
clear
insheet using ..\..\data\census\source\population1982&2000_county.csv
** Drop counties with no info
drop if actpop==.
keep unit_code_08 year actpop
**These are CP level data integrated elsewhere
drop if unit_code_08==-9999|unit_code_08==.
replace year = 1982 if year==1983
replace year = 2000 if year==2001
sort unit_code_08 year
save temp820005.dta, replace

clear
use ..\..\data\census\generated\count1982.dta

gen year = 1982
gen double unit_code_08 = real(countyCode)

*** Two counties span 2005 definition prefectures.  Split them evenly
gen expnd = 1
replace expnd = 2 if unit_code_08==412632 | unit_code_08==620121
replace unit_code_08 = unit_code_08+.1 if expnd==2
foreach X of varlist totalPop-male19to55WorkCollege {
   replace `X' = `X'/2 if expnd==2
}
expand expnd

sort unit_code_08 year
save temp82.dta, replace

use temp.dta, clear
rename _merge _m
sort unit_code_08 year
save temp_1.dta, replace


********** 6. Merge Census Data Sets to the Correspondence Table *********************

use ..\..\data\correspondence_tables\generated\correspondence_82_10.dta
keep if year==2010

*** Merge on RZ's codes
sort unit_code_08 year
merge unit_code_08 year using cnt2010_temp.dta
tab _merge
** 1s are all special districts with no census data available
drop _merge

*** Merge on Census data
sort unit_code_08 year
merge unit_code_08 year using temp_1.dta, update

tab _merge if year==2010 /*Five units in Shantou should be included in 2010*/
drop _merge
rename _m _merge

save temp.dta, replace

** Try merging again for the units that didn't merge
use temp.dta
drop if _merge==2
replace census_code = unit_code_08 if _merge==1 & year>1990
gen mrg1 = _merge==1 & (year==2000|year==2005)
drop _merge
sort mrg1 census_code year 
merge mrg1 census_code year using cen0005.dta, update
tab _merge year if (year==2000|year==2005) & mrg1==1 & unit_status>0
** Drop _merge=2 obs b/c they are outside of study area or couldn't be matched
drop if _merge==2
drop mrg1 _merge

*** Clean up census and city codes
drop census_code
rename cencode census_code
drop city_codec

*** Merge on 1982 Sample Data
sort unit_code_08 year
merge unit_code_08 year using temp82.dta, update
tab year _merge

*Should have _merge=2 units in 1982 that are outside our study area
*** These are out of our sample area or involve units split across prefectures
drop if _merge==2
drop _merge

*** Merge on 820005 Full Population Data
sort unit_code_08 year
merge unit_code_08 year using temp820005.dta, update
tab year _merge
** these are special districts or uncollected data
drop _merge

foreach list in     		"totalPop 					c_totalPop 				census total pop"   ///
                                "emp                                     c_emp                  census total emp"   ///
                                "empAgr                              c_emp_ag          census agricultural emp"   ///
                                "empMining                                       c_emp_min       census mining emp"   ///
                                "empManu                                         c_emp_man                      census manufacturing emp"   ///
                                "empManuTrad                                c_emp_man_trad           census manu trad emp"   ///
                                "empManuChem                             c_emp_man_chem        census manu chem emp"   ///
                                "empManuHeavy                            c_emp_man_hvy            census manu heavy emp"   ///
                                "empManuHighTech                      c_emp_man_hitech       census manu hightech emp"   ///
                                "empUtil                              c_emp_util                         census util emp"   ///
                                "empCons                           c_emp_cons                      census cons emp"   ///
                                "empSoft                            c_emp_soft                       census soft emp"   ///
                                "empWhole                                       c_emp_whsl                      census wholesale emp"   ///
                                "empFina                            c_emp_fina                        census finance emp"   ///
                                "empRes                             c_emp_res                         census res emp"   ///
                                "empCul                              c_emp_cul                          census cul emp"   ///
                                "empPub                             c_emp_pub                       census pub emp"   ///
								"empTransport                             c_emp_tran                       census transportation emp"   ///
                                "ruralMig                             c_rur_Mig           census rural migrant"   ///
                                "urbanMig                          c_urb_Mig          census urban migrant"   ///
                                "ruralMig19to55                               c_rur_mig_19to55_        census rural migrant 19-55"   ///
                                "ruralMig19to55High                      c_rur_mig_19to55_h      census rural migrant 19-55 high"   ///
                                "urbanMig19to55                             c_urb_mig_19to55_       census urban migrant 19-55"   ///
                                "urbanMig19to55High                    c_urb_mig_19to55_h     census urban migrant 19-55 high"   ///
                                "female19to55                                  c_f_19to55                         census female 19-55"   ///
                                "femaleUnder19                              c_f_Under19                     census female <19"   ///
                                "femaleOver55                                 c_f_Over55                        census female >55"   ///
                                "female19to55High         c_f_19to55_h                    census female 19-55 high"   ///
                                "female19to55Work                       c_f_19to55_w_                census female 19-55 work"   ///
                                "female19to55WorkHigh                              c_f_19to55_w_h                              census female 19-55 work high"   ///
                                "female19to55WorkCollege        c_f_19to55_w_c                              census female19-55 work college"   ///
                                "male19to55                                      c_m_19to55      census male 19-55"   ///
                                "maleUnder19                                  c_m_Under19                   census male <19"   ///
                                "maleOver55                                     c_m_Over55                      census male >55"   ///
                                "male19to55High                             c_m_19to55_h                  census male 19-55 high"   ///
                                "male19to55Work                           c_m_19to55_w_                              census male 19-55 work"   ///
                                "male19to55WorkHigh                  c_m_19to55_w_h           census male 19-55 work high"   ///
                                "male19to55WorkCollege            c_m_19to55_w_c            census male 19-55 work college"   ///
                                "ruralMigA                          c_rur_mig_A                      census rural migrantA"   ///
                                "urbanMigA                                       c_urb_mig_A                     census urban migrant A"   ///
                                "ruralMig19to55A                            c_rur_mig_19to55_A     census rural migrant 19-55 A"   ///
                                "ruralMig19to55HighA                   c_rur_mig_19to55_h_A    census rural migrant 19-55 high A"   ///
                                "urbanMig19to55A                          c_urb_mig_19to55_A    census urban migrant 19-55 A"   ///
                                "urbanMig19to55HighA                                 c_urb_mig_19to55_h_A               census urban migrant 19-55 high A"   ///
                                "ruralOutBirth                                   c_rur_ob_                           census rural out birth"   ///
                                "urbanOutBirth                                 c_urb_ob_                          census urban out birth"   ///
                                "ruralOutBirth19to55      c_rur_ob_19to55_          census rural out birth 19-55"   ///
                                "ruralOutBirth19to55High             c_rur_ob_19to55_h        census rural out birth 19-55 high"   ///
                                "urbanOutBirth19to55                   c_urb_ob_19to55_         census urban out birth 19-55"   ///
                                "urbanOutBirth19to55High          c_urb_ob_19to55_h       census urban out birth 19-55 high"   ///
                                "ruralOutBirthA1                              c_rur_ob_A1                     census rural out birth A1"   ///
                                "urbanOutBirthA1                           c_urb_ob_A1                    census urban out birth A1"   ///
                                "ruralOutBirth19to55A1                                c_rur_ob_19to55_A1     census rural out birth 19-55 A1"   ///
                                "ruralOutBirth19to55HighA1       c_rur_ob_19to55_h_A1                census rural out birth 19-55 high A1"  ///
                                "urbanOutBirth19to55A1              c_urb_ob_19to55_A1    census urban out birth 19-55 A1"   ///
                                "urbanOutBirth19to55HighA1     c_urb_ob_19to55_h_A1               census urban out birth 19-55 high A1"  ///
                                "ruralOutBirthA2                              c_rur_ob_A2                     census rural out birth A2"   ///
                                "urbanOutBirthA2                           c_urb_ob_A2                    census urban out birth A2"   ///
                                "ruralOutBirth19to55A2                                c_rur_ob_19to55_A2     census rural out birth 19-55 A2"   ///
                                "ruralOutBirth19to55HighA2       c_rur_ob_19to55_h_A2                census rural out birth 19-55 high A2"  ///
                                "urbanOutBirth19to55A2              c_urb_ob_19to55_A2    census urban out birth 19-55 A2"   ///
                                "urbanOutBirth19to55HighA2     c_urb_ob_19to55_h_A2               census urban out birth 19-55 high A2"  {
                local  var_old = word("`list'",1)
                local  var_new = word("`list'",2)
                local  lab0 = trim(subinword("`list'","`var_old'","",1))
                local lab = trim(subinword("`lab0'","`var_new'","",1))
                rename `var_old' `var_new'
                label var `var_new' "`lab'"
                }

*** Set census variables to 0 in special districts
foreach X of varlist c_* a1* a2* a3* a4* a7* a8* aN* {
replace `X' = 0 if (unit_status==0 | unit_status==-1) & `X'==.
}

************** 7. Create Predicted GDP Variables **************

**** 1990 Rural County GDP
*** Merge on CP Census data for urban units we need to impute
sort city05 year
merge city05 year using tempcen90.dta
tab _merge if year==1990
drop _merge
*** Merge on city proper gdp data
sort city05 year
merge city05 year using ..\..\data\tabular_data_BJ\generated\cp90_gdp.dta
tab _merge year
gen cgdp_sect1 = cgdp_py-cgdp_sect2-cgdp_sect3
sort city05 year
*** This is for urban units
by city05 year: gen first = _n==1
egen ctgdp_sect1 = sum(cgdp_sect1*first), by(province_code90 year)
egen ctgdp_sect2 = sum(cgdp_sect2*first), by(province_code90 year)
egen ctgdp_sect3 = sum(cgdp_sect3*first), by(province_code90 year)
*** This is for county cities
gen gdp_sect1 = gdp_py-gdp_sect2-gdp_sect3
egen ccgdp_sect1 = sum(gdp_sect1), by(province_code90 year)
egen ccgdp_sect2 = sum(gdp_sect2), by(province_code90 year)
egen ccgdp_sect3 = sum(gdp_sect3), by(province_code90 year)
*** These are GDP components for province remainders
gen dgdp_sect1 = pgdp_sec1-ctgdp_sect1-ccgdp_sect1
gen dgdp_sect2 = pgdp_sec2-ctgdp_sect2-ccgdp_sect2
gen dgdp_sect3 = pgdp_sec3-ctgdp_sect3-ccgdp_sect3
*** These are the units that we need to impute GDP for (including 3 prefec cities with no py gdp data)
gen impute = gdp_py==.
replace impute = 0 if ((unit_status==1|unit_status==0) & _merge==3)
drop _merge

*** Do population based imputation
gen emp_sector1 = a384+a390
gen emp_sector2 = a387
gen emp_sector3 = a381-emp_sector2-emp_sector1
egen ctemp_sect1 = sum(cemp_sect1*first), by(province_code90 year)
egen ctemp_sect2 = sum(cemp_sect2*first), by(province_code90 year)
egen ctemp_sect3 = sum(cemp_sect3*first), by(province_code90 year)
egen totemp_sect1 = sum(emp_sector1*impute), by(province_code90 year)
replace totemp_sect1 = totemp_sect1+ctemp_sect1 if ctemp_sect1~=.
egen totemp_sect2 = sum(emp_sector2*impute), by(province_code90 year)
replace totemp_sect2 = totemp_sect2+ctemp_sect2 if ctemp_sect2~=.
egen totemp_sect3 = sum(emp_sector3*impute), by(province_code90 year)
replace totemp_sect3 = totemp_sect3+ctemp_sect3 if ctemp_sect3~=.
gen frac1 = emp_sector1/totemp_sect1
replace frac1 = 0 if impute==0
gen frac2 = emp_sector2/totemp_sect2
replace frac2 = 0 if impute==0
gen frac3 = emp_sector3/totemp_sect3
replace frac3 = 0 if impute==0
gen gdp_predx1 = frac1*dgdp_sect1
gen gdp_predx2 = frac2*dgdp_sect2
gen gdp_predx3 = frac3*dgdp_sect3
gen gdp_predc = gdp_predx1+gdp_predx2+gdp_predx3
gen gdp_sect2_predc = gdp_predx2
replace gdp_predc = . if impute==0
replace gdp_sect2_predc = . if impute==0
drop gdp_predx1 gdp_predx2 gdp_predx3

*** Fill in gdp_predc for all urban units in 1990 w/gdp numbers (to be used for the 8 downgraded by 2010)
gen empx_sector1 = c_emp_ag+c_emp_min
gen empx_sector2 = c_emp_man
gen empx_sector3 = c_emp-empx_sector2-empx_sector1
egen totempx_sect1 = sum(empx_sector1*(cp90==1)*(unit_status==1)), by(city05 year)
egen totempx_sect2 = sum(empx_sector2*(cp90==1)*(unit_status==1)), by(city05 year)
egen totempx_sect3 = sum(empx_sector3*(cp90==1)*(unit_status==1)), by(city05 year)
gen fracx1 = empx_sector1/totempx_sect1
gen fracx2 = empx_sector2/totempx_sect2
gen fracx3 = empx_sector3/totempx_sect3
gen gdp_predx1 = fracx1*cgdp_sect1
gen gdp_predx2 = fracx2*cgdp_sect2
gen gdp_predx3 = fracx3*cgdp_sect3
replace gdp_predc = gdp_predx1+gdp_predx2+gdp_predx3 if year==1990 & gdp_predc==. & unit_status==1
replace gdp_sect2_predc = gdp_predx2  if year==1990 & gdp_sect2_predc==. & unit_status==1
drop gdp_predx*

*** Fill in gdp_predc for all urban units in 1990 that need to be imputed (no 100% census count data)
gen gdp_predx1 = fracx1*(cemp_sect1/totemp_sect1)*dgdp_sect1
gen gdp_predx2 = fracx2*(cemp_sect2/totemp_sect2)*dgdp_sect2
gen gdp_predx3 = fracx3*(cemp_sect3/totemp_sect3)*dgdp_sect3
replace gdp_predc = gdp_predx1+gdp_predx2+gdp_predx3 if year==1990 & gdp_predc==. & unit_status==1
replace gdp_sect2_predc = gdp_predx2  if year==1990 & gdp_sect2_predc==. & unit_status==1
*** These are a few units we can't impute for, but have had their portions go nearby
replace gdp_predc = 0 if year==1990 & gdp_predc==. & unit_status==1 & c_totalPop==.
replace gdp_sect2_predc = 0 if year==1990 & gdp_sect2_predc==. & unit_status==1 & c_totalPop==.
drop empx_sect* totempx_sect* fracx*

**** Perform similar exercise with the fenxian data
** For areas with no Fenxian data, do as above
gen nofenxian = gdp_fenxian==.
*** Impute a few missing sector breakdown fenxian obs
replace gdp_sector1_fenxian = gdp_fenxian*emp_sector1/a381 if gdp_sector1_fenxian==.
replace gdp_sector2_fenxian = gdp_fenxian*emp_sector2/a381 if gdp_sector2_fenxian==.
replace gdp_sector3_fenxian = gdp_fenxian*emp_sector3/a381 if gdp_sector3_fenxian==.
egen frac1nofenxian = sum(frac1*nofenxian), by(province_code90 year)
egen frac2nofenxian = sum(frac2*nofenxian), by(province_code90 year)
egen frac3nofenxian = sum(frac3*nofenxian), by(province_code90 year)
egen totfen_sect1 = sum(gdp_sector1_fenxian*impute), by(province_code90 year)
egen totfen_sect2 = sum(gdp_sector2_fenxian*impute), by(province_code90 year)
egen totfen_sect3 = sum(gdp_sector3_fenxian*impute), by(province_code90 year)
gen fracf1 = gdp_sector1_fenxian/totfen_sect1
gen fracf2 = gdp_sector2_fenxian/totfen_sect2
gen fracf3 = gdp_sector3_fenxian/totfen_sect3
gen gdp_predy1 = fracf1*(1-frac1nofenxian)*impute*dgdp_sect1
gen gdp_predy2 = fracf2*(1-frac2nofenxian)*impute*dgdp_sect2
gen gdp_predy3 = fracf3*(1-frac3nofenxian)*impute*dgdp_sect3
gen gdp_predf = gdp_predy1+gdp_predy2+gdp_predy3
replace gdp_predf = gdp_predc if gdp_predf==.
gen gdp_sect2_predf = gdp_predy2
replace gdp_sect2_predf = gdp_sect2_predc if gdp_sect2_predf==.
replace gdp_predf = . if impute==0
replace gdp_sect2_predf = . if impute==0
replace gdp_predf = gdp_predc if year==1990 & gdp_predf==. & unit_status==1
replace gdp_sect2_predf = gdp_sect2_predc  if year==1990 & gdp_sect2_predf==. & unit_status==1
drop dgdp_* ccgdp_* ctgdp_* province_code90 impute emp_sect*
drop frac1 frac2 frac3 ctemp_sect* frac1n* totfen_* first gdp_predy* gdp_predx*

*** Create predicted rural GDP in 2000, 2005 & 2010 for a few obs
reg gdp_michigan c_totalPop c_emp-c_emp_tran if year==2000 & (unit_status==2|unit_status==3)
predict gdp_predx if year==2000
replace gdp_michigan = gdp_predx if year==2000 & gdp_michigan==. & (unit_status==2|unit_status==3)
reg gdp_michigan c_totalPop c_emp-c_emp_tran if year==2005 & (unit_status==2|unit_status==3)
predict gdp_predy if year==2005
replace gdp_michigan = gdp_predy if year==2005 & gdp_michigan==. & (unit_status==2|unit_status==3)
reg gdp_michigan a102 aN6003 aN6005-aN6014 aN6016-aN6021 if year==2010 & (unit_status==2|unit_status==3)
predict gdp_predz if year==2010
replace gdp_michigan = gdp_predz if year==2010 & gdp_michigan==. & (unit_status==2|unit_status==3)
drop gdp_predx gdp_predy gdp_predz
drop nofenxian frac2nofenxian frac3nofenxian fracf1 fracf2 fracf3 pgdp_*

*** Predict Net Assets in 1990 (very crude) for rural cp10=1 units with core covered
gen lasset_n_qz=log(asset_n_qz)
gen la387 = log(a387)
reg lasset_n_qz la387 if year==1990 & (unit_status==2|unit_status==3)
predict lassetn if year==1990 & cp10==1 & (unit_status==2|unit_status==3)
gen dasset = asset_n_qz~=.
replace dasset = 1 if unit_status==1
egen okcp = max(dasset) if cp10==1, by(city05 year)
replace asset_n_qz = exp(lassetn+(e(rmse)^2)/2) if cp10==1 & okcp==1 & (unit_status==2|unit_status==3) & year==1990
drop dasset lassetn la387 okcp

*** This is so it can be combined with the prefecture level data
drop culland
rename cularea culland

*** Predict Sector 2 GDP in 2010 by allocating reported numbers using manufacturing employment shares
sort city05 year
merge city05 year using ../../data/tabular_data_BJ/generated/pf10_gdp.dta
tab _merge year
gen city = year==2010 & (unit_status==1|unit_status==0)
drop cemp_sect2
egen cemp = sum(a381*city), by(city05 year)
egen pfemp = sum(a381), by(city year)
egen cemp_sect2 = sum(aN6005*city), by(city05 year)
egen pfemp_sect2 = sum(aN6005), by(city05 year)
replace gdp_predc = cgdp_py*a381/cemp if city==1
replace gdp_predc = (pgdp_py-cgdp_py)*a381/(pfemp-cemp) if city==0 & year==2010
replace gdp_sect2_predc = cgdp_sect2*aN6005/cemp_sect2 if city==1
replace gdp_sect2_predc = (pgdp_sect2-cgdp_sect2)*aN6005/(pfemp_sect2-cemp_sect2) if city==0 & year==2010
drop city cemp pfemp cgdp_py pgdp_py cemp_sect2 pfemp_sect2 pgdp_* cgdp_* _merge


***************** 8. Label Variables ***************************

label variable a101 "Total no. of H                                            Table 2-19"
label variable a102 "Total P                                                            "
label variable a103 "Total M                                   H=household(s)           "
label variable a104 "Total F                                   M=male(s)                "
label variable a105 "Residents of urban wards: No. of H        F=female(s)              "
label variable a106 "                     : P               P=population (M+F)       "
label variable a107 "                     : M                                        "
label variable a108 "                     : F                                        "
label variable a109 "Residents of villages: No. of H                                    "
label variable a110 "                  : P                                           "
label variable a111 "                  : M                                           "
label variable a112 "                  : F                                           "
label variable a113 "Residents of rural units outside village jurisdiction: No. of H    "
label variable a114 "                                                  : P           "
label variable a115 "                                                  : M           "
label variable a116 "                                                  : F           "
label variable a117 "Non-agricultural H: P                                              "
label variable a118 "               : M                                              "
label variable a119 "               : F                                              "
label variable a120 "Agricultural H: P                                                  "
label variable a121 "           : M                                                  "
label variable a122 "           : F                                                  "
label variable a123 "Specially designated H and persons outside any H: P                "
label variable a124 "                                             : M                "
label variable a125 "                                             : F                "
label variable a126 "All residents of all zhen (townships) combined: No. of H  Table 2-11"
label variable a127 "                                       : P                  "
label variable a128 "                                       : M                  "
label variable a129 "                                       : F                  "
label variable a130 "Town residents in all zhen combined: No. of H                      "
label variable a131 "                                 : P                             "
label variable a132 "                                 : M                             "
label variable a133 "                                 : F                             "
label variable a134 "Rural residents in all zhen combined: No. of H                     "
label variable a135 "                                  : P                            "
label variable a136 "                                  : M                            "
label variable a137 "                                  : F                            "
label variable a138 "Non-agricultural H in all zhen combined: P                Table 2-12"
label variable a139 "                                     : M                         "
label variable a140 "                                     : F                         "
label variable a141 "Agricultural H in all zhen combined: P                             "
label variable a142 "                              : M                             "
label variable a143 "                           : F                                "
label variable a144 "Specially designated H, etc. in all zhen combined: P               "
label variable a145 "                                            : M               "
label variable a146 "                                            : F               "
label variable a181 "Total P, all ages and both sexes                                    A1"
label variable a182 "Total M, all ages                                                   A2"
label variable a183 "Total F, all ages                                                   A3"
label variable a184 "P 0-4                                                               A4"
label variable a185 "M 0-4                                                               A5"
label variable a186 "F 0-4                                                               A6"
label variable a187 "P 5-9                                                               A7"
label variable a188 "M 5-9                                                               A8"
label variable a189 "F 5-9                                                               A9"
label variable a190 "P 10-14                                                            A10"
label variable a191 "M 10-14                                                            A11"
label variable a192 "F 10-14                                                            A12"
label variable a193 "P 15-19                                                            A13"
label variable a194 "M 15-19                                                            A14"
label variable a195 "F 15-19                                                            A15"
label variable a196 "P 20-24                                                            A16"
label variable a197 "M 20-24                                                            A17"
label variable a198 "F 20-24                                                            A18"
label variable a199 "P 25-29                                                            A19"
label variable a200 "M 25-29                                                            A20"
label variable a201 "F 25-29                                                            A21"
label variable a202 "P 30-34                                                            A22"
label variable a203 "M 30-34                                                            A23"
label variable a204 "F 30-34                                                            A24"
label variable a205 "P 35-39                                                            A25"
label variable a206 "M 35-39                                                            A26"
label variable a207 "F 35-39                                                            A27"
label variable a208 "P 40-44                                                            A28"
label variable a209 "M 40-44                                                            A29"
label variable a210 "F 40-44                                                            A30"
label variable a211 "P 45-49                                                            A31"
label variable a212 "M 45-49                                                            A32"
label variable a213 "F 45-49                                                            A33"
label variable a214 "P 50-54                                                            A34"
label variable a215 "M 50-54                                                            A35"
label variable a216 "F 50-54                                                            A36"
label variable a217 "P 55-59                                                            A37"
label variable a218 "M 55-59                                                            A38"
label variable a219 "F 55-59                                                            A39"
label variable a220 "P 60-64                                                            A40"
label variable a221 "M 60-64                                                            A41"
label variable a222 "F 60-64                                                            A42"
label variable a223 "P 65-69                                                            A43"
label variable a224 "M 65-69                                                            A44"
label variable a225 "F 65-69                                                            A45"
label variable a226 "P 70-74                                                            A46"
label variable a227 "M 70-74                                                            A47"
label variable a228 "F 70-74                                                            A48"
label variable a229 "P 75-79                                                            A49"
label variable a230 "M 75-79                                                            A50"
label variable a231 "F 75-79                                                            A51"
label variable a232 "P 80-84                                                            A52"
label variable a233 "M 80-84                                                            A53"
label variable a234 "F 80-84                                                            A54"
label variable a235 "P 85-89                                                            A55"
label variable a236 "M 85-89                                                            A56"
label variable a237 "F 85-89                                                            A57"
label variable a238 "P 90-94                                                            A58"
label variable a239 "M 90-94                                                            A59"
label variable a240 "F 90-94                                                            A60"
label variable a241 "P 95-99                                                            A61"
label variable a242 "M 95-99                                                            A62"
label variable a243 "F 95-99                                                            A63"
label variable a244 "P 100+                                                             A64"
label variable a245 "M 100+                                                             A65"
label variable a246 "F 100+                                                             A66"
label variable a261 "Total P aged 6+                                                     E1"
label variable a262 "Total M aged 6+                                                     E2"
label variable a263 "Total F aged 6+                                                     E3"
label variable a264 "P university                                                        E4"
label variable a265 "M                                                                   E5"
label variable a266 "F                                                                   E6"
label variable a267 "P technical/junior college                                          E7"
label variable a268 "M                                                                   E8"
label variable a269 "F                                                                   E9"
label variable a270 "P secondary technical school                                       E10"
label variable a271 "M                                                                  E11"
label variable a272 "F                                                                  E12"
label variable a273 "P senior middle school                                             E13"
label variable a274 "M                                                                  E14"
label variable a275 "F                                                                  E15"
label variable a276 "P junior middle school                                             E16"
label variable a277 "M                                                                  E17"
label variable a278 "F                                                                  E18"
label variable a279 "P primary school                                                   E19"
label variable a280 "M                                                                  E20"
label variable a281 "F                                                                  E21"
label variable a282 "P illiterate/semi-illiterate                                       E22"
label variable a283 "M                                                                  E23"
label variable a284 "F                                                                  E24"
label variable a291 "Total P 15+                                                         E1"
label variable a292 "Total M 15+                                                         E2"
label variable a293 "Total F 15+                                                         E3"
label variable a294 "P illiterate/semi-illiterate                                        E4"
label variable a295 "M                                                                   E5"
label variable a296 "F                                                                   E6"
label variable a301 "Total P 15+                                                         M1"
label variable a302 "Total M 15+                                                         M2"
label variable a303 "Total F 15+                                                         M3"
label variable a304 "P never married                                                     M4"
label variable a305 "M                                                                   M5"
label variable a306 "F                                                                   M6"
label variable a307 "P married                                                           M7"
label variable a308 "M                                                                   M8"
label variable a309 "F                                                                   M9"
label variable a310 "P widowed                                                          M10"
label variable a311 "M                                                                  M11"
label variable a312 "F                                                                  M12"
label variable a313 "P divorced                                                         M13"
label variable a314 "M                                                                  M14"
label variable a315 "F                                                                  M15"
label variable a321 "All births 1Jan89-30Jun90                                           B1"
label variable a322 "M births                                                            B2"
label variable a323 "F births                                                            B3"
label variable a324 "All births 1Jan89-30Jun89                                           B4"
label variable a325 "M births                                                            B5"
label variable a326 "F births                                                            B6"
label variable a327 "All births 1Jul89-31Dec89                                           B7"
label variable a328 "M births                                                            B8"
label variable a329 "F births                                                            B9"
label variable a330 "All births 1Jan90-30Jun90                                          B10"
label variable a331 "M births                                                           B11"
label variable a332 "F births                                                           B12"
label variable a341 "All deaths 1Jan89-30Jun90                                           D1"
label variable a342 "M deaths                                                            D2"
label variable a343 "F deaths                                                            D3"
label variable a344 "All deaths 1Jan89-30Jun89                                           D4"
label variable a345 "M deaths                                                            D5"
label variable a346 "F deaths                                                            D6"
label variable a347 "All deaths 1Jul89-31Dec89                                           D7"
label variable a348 "M deaths                                                            D8"
label variable a349 "F deaths                                                            D9"
label variable a350 "All deaths 1Jan90-30Jun90                                          D10"
label variable a351 "M deaths                                                           D11"
label variable a352 "F deaths                                                           D12"
label variable a361 "Total inmigrants                                                    R1"
label variable a362 "Within-province inmigrants: Total                                   R2"
label variable a363 "                       : from municipal cities                   R3"
label variable a364 "                       : from zhen (urban townships)             R4"
label variable a365 "                       : from xiang (rural townships)            R5"
label variable a366 "Inmigrants from other provinces: Total                              R6"
label variable a367 "                         : from municipal cities                 R7"
label variable a368 "                         : from zhen                             R8"
label variable a369 "                         : from xiang                            R9"
label variable a370 "Other inmigrants                                                   R10"
label variable a381 "Total employed P                                                    I1"
label variable a382 "Total          M                                                    I2"
label variable a383 "Total          F                                                    I3"
label variable a384 "P Agric./forestry/animal husb./fishery/water conservancy            I4"
label variable a385 "M                                                                   I5"
label variable a386 "F                                                                   I6"
label variable a387 "P Industry                                                          I7"
label variable a388 "M                                                                   I8"
label variable a389 "F                                                                   I9"
label variable a390 "P Mining, prospecting                                              I10"
label variable a391 "M                                                                  I11"
label variable a392 "F                                                                  I12"
label variable a393 "P Construction                                                     I13"
label variable a394 "M                                                                  I14"
label variable a395 "F                                                                  I15"
label variable a396 "P Transport, posts, telecommunications                             I16"
label variable a397 "M                                                                  I17"
label variable a398 "F                                                                  I18"
label variable a399 "P Commerce, supply and marketing                                   I19"
label variable a400 "M                                                                  I20"
label variable a401 "F                                                                  I21"
label variable a402 "P Real estate, utilities, residential services                     I22"
label variable a403 "M                                                                  I23"
label variable a404 "F                                                                  I24"
label variable a405 "P Medicine, health care, sports, welfare                           I25"
label variable a406 "M                                                                  I26"
label variable a407 "F                                                                  I27"
label variable a408 "P Education, culture, arts, radio, television                      I28"
label variable a409 "M                                                                  I29"
label variable a410 "F                                                                  I30"
label variable a411 "P Science, technology                                              I31"
label variable a412 "M                                                                  I32"
label variable a413 "F                                                                  I33"
label variable a414 "P Finance, insurance                                               I34"
label variable a415 "M                                                                  I35"
label variable a416 "F                                                                  I36"
label variable a417 "P Government, party, and NGOs                                      I37"
label variable a418 "M                                                                  I38"
label variable a419 "F                                                                  I39"
label variable a420 "P Other economic activities                                        I40"
label variable a421 "M                                                                  I41"
label variable a422 "F                                                                  I42"
label variable a431 "P Professional and high-level technical personnel                   O1"
label variable a432 "M                                                                   O2"
label variable a433 "F                                                                   O3"
label variable a434 "P Officials/managers in gov't, party, business, & NGOs              O4"
label variable a435 "M                                                                   O5"
label variable a436 "F                                                                   O6"
label variable a437 "P Clerical personnel                                                O7"
label variable a438 "M                                                                   O8"
label variable a439 "F                                                                   O9"
label variable a440 "P Employees in commercial sector                                   O10"
label variable a441 "M                                                                  O11"
label variable a442 "F                                                                  O12"
label variable a443 "P Employees in service sector                                      O13"
label variable a444 "M                                                                  O14"
label variable a445 "F                                                                  O15"
label variable a446 "P Workers in agric., forestry, animal husb., fisheries             O16"
label variable a447 "M                                                                  O17"
label variable a448 "F                                                                  O18"
label variable a449 "P Workers in manufacturing, construction, transport, etc.          O19"
label variable a450 "M                                                                  O20"
label variable a451 "F                                                                  O21"
label variable a452 "P Other and misc. occupations                                      O22"
label variable a453 "M                                                                  O23"
label variable a454 "F                                                                  024"
label variable a701 "P Han Chinese"
label variable a702 "M        "
label variable a703 "F        "
label variable a704 "P Mongol (Menggu) minority"
label variable a705 "M        "
label variable a706 "F        "
label variable a707 "P Hui minority"
label variable a708 "M        "
label variable a709 "F        "
label variable a710 "P Tibetan (Zang) minority"
label variable a711 "M        "
label variable a712 "F        "
label variable a713 "P Uygur (Weiwu`er) minority"
label variable a714 "M        "
label variable a715 "F        "
label variable a716 "P Miao minority"
label variable a717 "M        "
label variable a718 "F        "
label variable a719 "P Yi minority"
label variable a720 "M        "
label variable a721 "F        "
label variable a722 "P Zhuang minority"
label variable a723 "M        "
label variable a724 "F        "
label variable a725 "P Bouyei (Buyi) minority"
label variable a726 "M        "
label variable a727 "F        "
label variable a728 "P Korean (Chaoxian) minority"
label variable a729 "M        "
label variable a730 "F        "
label variable a731 "P Manchu (Man) minority"
label variable a732 "M        "
label variable a733 "F        "
label variable a734 "P Dong minority"
label variable a735 "M        "
label variable a736 "F        "
label variable a737 "P Yao minority"
label variable a738 "M        "
label variable a739 "F        "
label variable a740 "P Bai minority"
label variable a741 "M        "
label variable a742 "F        "
label variable a743 "P Tujia minority"
label variable a744 "M        "
label variable a745 "F        "
label variable a746 "P Hani minority"
label variable a747 "M        "
label variable a748 "F        "
label variable a749 "P Kazak (Hasake) minority"
label variable a750 "M        "
label variable a751 "F        "
label variable a752 "P Dai minority"
label variable a753 "M        "
label variable a754 "F        "
label variable a755 "P Li minority"
label variable a756 "M        "
label variable a757 "F        "
label variable a758 "P Lisu minority"
label variable a759 "M        "
label variable a760 "F        "
label variable a761 "P Va (Wa) minority"
label variable a762 "M        "
label variable a763 "F        "
label variable a764 "P She minority"
label variable a765 "M        "
label variable a766 "F        "
label variable a767 "P Gaoshan minority"
label variable a768 "M        "
label variable a769 "F        "
label variable a770 "P Lahu minority"
label variable a771 "M        "
label variable a772 "F        "
label variable a773 "P Shui minority"
label variable a774 "M        "
label variable a775 "F        "
label variable a776 "P Dongxiang minority"
label variable a777 "M        "
label variable a778 "F        "
label variable a779 "P Naxi minority"
label variable a780 "M        "
label variable a781 "F        "
label variable a782 "P Jingpo minority"
label variable a783 "M        "
label variable a784 "F        "
label variable a785 "P Kirgiz (Ke`erkezi) minority"
label variable a786 "M        "
label variable a787 "F        "
label variable a788 "P Tu minority"
label variable a789 "M        "
label variable a790 "F        "
label variable a791 "P Daur (Dawo`er) minority"
label variable a792 "M        "
label variable a793 "F        "
label variable a794 "P Mulam (Mulao) minority"
label variable a795 "M        "
label variable a796 "F        "
label variable a797 "P Qiang minority"
label variable a798 "M        "
label variable a799 "F        "
label variable a800 "P Blang (Bulang) minority"
label variable a801 "M        "
label variable a802 "F        "
label variable a803 "P Salar (Sala) minority"
label variable a804 "M        "
label variable a805 "F        "
label variable a806 "P Maonan minority"
label variable a807 "M        "
label variable a808 "F        "
label variable a809 "P Gelo (Gelao) minority"
label variable a810 "M        "
label variable a811 "F        "
label variable a812 "P Xibe (Xibo) minority"
label variable a813 "M        "
label variable a814 "F        "
label variable a815 "P Achang minority"
label variable a816 "M        "
label variable a817 "F        "
label variable a818 "P Pumi minority"
label variable a819 "M        "
label variable a820 "F        "
label variable a821 "P Tajik (Tajike) minority"
label variable a822 "M        "
label variable a823 "F        "
label variable a824 "P Nu minority"
label variable a825 "M        "
label variable a826 "F        "
label variable a827 "P Uzbek (Wuzibieke) minority"
label variable a828 "M        "
label variable a829 "F        "
label variable a830 "P Russian (Eluosi) minority"
label variable a831 "M        "
label variable a832 "F        "
label variable a833 "P Ewenki (Ewenke) minority"
label variable a834 "M        "
label variable a835 "F        "
label variable a836 "P De'ang minority"
label variable a837 "M        "
label variable a838 "F        "
label variable a839 "P Bonan (Bao`an) minority"
label variable a840 "M        "
label variable a841 "F        "
label variable a842 "P Yugur (Yugu) minority"
label variable a843 "M        "
label variable a844 "F        "
label variable a845 "P Jing minority"
label variable a846 "M        "
label variable a847 "F        "
label variable a848 "P Tatar (Tata`er) minority"
label variable a849 "M        "
label variable a850 "F        "
label variable a851 "P Drung (Dulong) minority"
label variable a852 "M        "
label variable a853 "F        "
label variable a854 "P Oroqen (Elunchun) minority"
label variable a855 "M        "
label variable a856 "F        "
label variable a857 "P Hezhen (Hezhe) minority"
label variable a858 "M        "
label variable a859 "F        "
label variable a860 "P Monba (Menba) minority"
label variable a861 "M        "
label variable a862 "F        "
label variable a863 "P Lhopa (Luoba) minority"
label variable a864 "M        "
label variable a865 "F        "
label variable a866 "P Jinuo minority"
label variable a867 "M        "
label variable a868 "F        "
label variable a869 "P nationality/ethnicity unknown"
label variable a870 "M                                     "
label variable a871 "F                                     "
label variable a872 "P naturalized citizens"
label variable a873 "M                       "
label variable a874 "F                       "
label variable census_code "original census code (Loren's Data"
label variable gbcenmq "original census code, 1990 aggregate data"
label variable gdp_predc "predicted gdp for rural counties, census pop allocation only"
label variable gdp_predf "predicted gdp for rural counties, fenxian gdp allocation"


************* 9. Scale up Census Variables Using Pop Weights ****************************

*** Scale up census variables
gen sampop = c_totalPop
foreach X of varlist c_* {
replace `X' = `X'*100 if year==1982

replace `X' = `X'/0.0104856948259734 if province_code==110000 & year==1990
replace `X' = `X'/0.0130791966036386 if province_code==120000 & year==1990
replace `X' = `X'/0.00976147989113532 if province_code==130000 & year==1990
replace `X' = `X'/0.0104715690183259 if province_code==140000 & year==1990
replace `X' = `X'/0.0118723679087625 if province_code==150000 & year==1990
replace `X' = `X'/0.0107576598978953 if province_code==210000 & year==1990
replace `X' = `X'/0.010814713382742 if province_code==220000 & year==1990
replace `X' = `X'/0.0110158000569816 if province_code==230000 & year==1990
replace `X' = `X'/0.0115695700221318 if province_code==310000 & year==1990
replace `X' = `X'/0.0103751284793056 if province_code==320000 & year==1990
replace `X' = `X'/0.0104590486930804 if province_code==330000 & year==1990
replace `X' = `X'/0.0120020335056383 if province_code==340000 & year==1990
replace `X' = `X'/0.0114160505034443 if province_code==350000 & year==1990
replace `X' = `X'/0.011929876629665 if province_code==360000 & year==1990
replace `X' = `X'/0.00980405597741145 if province_code==370000 & year==1990
replace `X' = `X'/0.0101516748980099 if province_code==410000 & year==1990
replace `X' = `X'/0.0104176992770507 if province_code==420000 & year==1990
replace `X' = `X'/0.0103084163513093 if province_code==430000 & year==1990
replace `X' = `X'/0.0100002807610139 if province_code==440000 & year==1990
replace `X' = `X'/0.010567331423635 if province_code==450000 & year==1990
replace `X' = `X'/0.0110877925398804 if province_code==460000 & year==1990
replace `X' = `X'/0.00980917665888599 if province_code==500000 & year==1990
replace `X' = `X'/0.00980917665888599 if province_code==510000 & year==1990
replace `X' = `X'/0.00961913386857969 if province_code==520000 & year==1990
replace `X' = `X'/0.0101087805269901 if province_code==530000 & year==1990
replace `X' = `X'/0.0110086019644719 if province_code==540000 & year==1990
replace `X' = `X'/0.0103511899662564 if province_code==610000 & year==1990
replace `X' = `X'/0.0108154966257644 if province_code==620000 & year==1990
replace `X' = `X'/0.0128754084074611 if province_code==630000 & year==1990
replace `X' = `X'/0.0083057473916061 if province_code==640000 & year==1990
replace `X' = `X'/0.0102265947680152 if province_code==650000 & year==1990

**** Use 100% counts as weights in 2000, excluding special districts
replace `X' = `X'*actpop/sampop if year==2000 & sampop~=0

*** Loren's programs create a uniform 0.2% sample, so multiply by 500
replace `X' = `X'*500 if year==2005
}

**** Drop unneeded variables
drop GbProv-County_EN mrg_* pgdp-nmhanzi countyCode ruralMigA1-urbanMig19to55HighA1 expnd-totemp_sect3 lasset_n_qz

sort unit_code_08 year
save ..\..\data\tabular_data_BJ\generated\us123-census.dta, replace




erase temp_census.dta
erase temp.dta
erase cnt2000_temp.dta
erase cnt2005_temp.dta
erase cnt2010_temp.dta
erase cen0005.dta
erase temp_1.dta
erase temp82.dta
erase temp820005.dta
erase tempcen90.dta
erase 2010CountyCensusA.dta
erase 2010CountyCensusL.dta
